Missingness across variants. Some ptids only have the ancestral marker measured. The missingness pattern is explained by the fact that samples from different steps have different set of markers measured. We decide to impute all variants if one variant marker exists.
Overall, “because the correlation among variants is pretty good for the non-naives and really the analysis focuses on the non-naives” (Peter), we decide to impute variants for those who have data for at least one variant to make the complexity of data processing more manageable.
dat1=dat_mapped
# the pattern in cases is un-remarkable
# dat1=subset(dat_mapped, 1==EventIndPrimaryD1)
for (t in 2:1) {
cat(paste0("stage ", t, '\n'))
for (tp in c("B", "Day22", "Day43")) {
# placebo has a similar pattern
for(trt in 1) {
myprint(tp, trt)
dat=subset(dat1, Trialstage==t & ph1.D43 & Trt==trt) # pattern is independent of Bserostatus
for (i in 1:5) {
dat[['tmp'%.%i]]=ifelse(is.na(dat[[tp%.%assays[i]]]), 0, 1)
}
dat$tmp=with(dat,paste0(tmp1,tmp2,tmp3,tmp4,tmp5))
print(table(dat$tmp))
cat("\n")
}
}
}
## stage 2
## tp = B ; trt = 1
##
## 00000 11111
## 4619 485
##
## tp = Day22 ; trt = 1
##
## 00000 11111
## 4620 484
##
## tp = Day43 ; trt = 1
##
## 00000 11111
## 4621 483
##
## stage 1
## tp = B ; trt = 1
##
## 00000 11111
## 3605 487
##
## tp = Day22 ; trt = 1
##
## 00000 01111 10111 11111
## 3604 1 1 486
##
## tp = Day43 ; trt = 1
##
## 00000 11111
## 3608 484
Missingness across baseline, d22 and d43, for the ancestral markers in the vaccine arms. The results support the decision “to keep it simple and require for RIS membership availability of data at all 3 time points” (Peter).
for(trt in 1:0) {
myprint(trt)
for (i in 2:1) {
cat(paste0("stage ", i, '\n'))
dat=subset(dat_proc, Trialstage==i & Trt==trt & ph1.D43)
times=c("B","Day22","Day43")
for (i in 1:3) {
dat[['tmp'%.%i]]=ifelse(is.na(dat[[times[i]%.%'pseudoneutid50']]), 0, 1)
}
dat$tmp=with(dat,paste0(tmp1,tmp2,tmp3))
print(table(dat$tmp))
cat("\n")
}
}
## trt = 1
## stage 2
##
## 000 001 011 101 110 111
## 4338 4 6 1 11 713
##
## stage 1
##
## 000 010 011 100 101 110 111
## 2961 12 28 45 15 29 956
##
## trt = 0
## stage 2
##
## 000 001 011 100 101 110 111
## 4377 2 5 1 3 2 562
##
## stage 1
##
## 000 001 010 011 100 101 110 111
## 3210 2 3 10 31 10 24 827
# a more detailed look at missingness by batch
# for (tp in c("B")) {
# #for (tp in c("B", "Day22", "Day43")) {
# # before imputation
# dat1=dat_mapped
# # after imputation
# # dat1=dat_proc
#
# for(trt in 1) {
# myprint(tp, trt)
# for (i in 2:1) { # stage
# cat(paste0("stage ", i, '\n'))
# if (trt==2) {
# # pool over arms
# dat=subset(dat1, Trialstage==i & ph1.D43)
# } else {
# dat=subset(dat1, Trialstage==i & ph1.D43 & Trt==trt)
# }
# for (i in 1:5) {
# dat[['tmp'%.%i]]=ifelse(is.na(dat[[tp%.%assays[i]]]), 0, 1)
# }
# dat$tmp=with(dat,paste0(tmp1,tmp2,tmp3,tmp4,tmp5))
# print(mytable(dat$EventIndPrimaryD43, dat$tmp, dat$nAbBatch, dat$Bserostatus))
# }
# }
#
# }
The number of D43 nAb ph2 samples by Bserostatus 0/1 (row) and Case 1/0 (column) and broken down by treatment arms (vaccine, placebo).
dat1=dat_proc
f=function(tab2, tab1, tab0) {
for (i in 1:2) {
print(paste0(tab2[i,1], " (", tab1[i,1], ", ", tab0[i,1], ") ", tab2[i,2], " (", tab1[i,2], ", ", tab0[i,2], ")"))
}
}
for (i in 2:1) {
cat(paste0("stage ", i, '\n'))
dat=subset(dat1, Trialstage==i & ph1.D43 & TwophasesampIndD43nAb)
tab2 = table(dat$Bserostatus, dat$EventIndPrimaryD43)[,2:1] # [,2:1] moves cases before non-cases
dat=subset(dat1, Trialstage==i & ph1.D43 & Trt==1 & TwophasesampIndD43nAb)
tab1 = table(dat$Bserostatus, dat$EventIndPrimaryD43)[,2:1] # [,2:1] moves cases before non-cases
dat=subset(dat1, Trialstage==i & ph1.D43 & Trt==0 & TwophasesampIndD43nAb)
tab0 = table(dat$Bserostatus, dat$EventIndPrimaryD43)[,2:1] # [,2:1] moves cases before non-cases
f(tab2, tab1, tab0)
}
## stage 2
## [1] "41 (17, 24) 232 (179, 53)"
## [1] "122 (61, 61) 884 (457, 427)"
## stage 1
## [1] "150 (74, 76) 630 (382, 248)"
## [1] "179 (68, 111) 849 (447, 402)"
Correlation is high between variant markers in Stage 2 non-naive, vaccine arm at both baseline and D43
mypairs(subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43)[,"B"%.%nassays])
mypairs(subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43)[,"Day43"%.%nassays])
Correlation is medium between variant markers at D43 in Stage 2 naive, vaccine arm. (The correlation between the ancestral marker and the other markers range between 0.63 and 0.67.)
mypairs(subset(dat_proc, Trialstage==2 & Bserostatus==0 & Trt==1 & ph1.D43)[,"Day43"%.%nassays])
Correlation is low between baseline and D22 or D43 in Stage 2 non-naive, vaccine arm.
mypairs(subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43)[,c("B","Day22","Day43")%.%assays[1]])
In stage 1, correlation is medium between ancestral markers and other markers. Correlation is high between BA4BA5 and BA1 among non-naive, but medium among naive.
mypairs(subset(dat_proc, Trialstage==1 & Bserostatus==1 & Trt==1 & ph1.D43)[,"Day22"%.%nassays])
mypairs(subset(dat_proc, Trialstage==1 & Bserostatus==0 & Trt==1 & ph1.D43)[,"Day22"%.%nassays])
A closer look at the relationship between baseline and Day 43 markers in the baseline positive. Note that the D43 markers appear saturated since level seems flat across different baseline marker levels.
par(mfrow=c(2,4))
# ancestral marker
corplot(Day43pseudoneutid50~Bpseudoneutid50, subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43), ylim=c(0,4))
myboxplot(Day43pseudoneutid50~I(Bpseudoneutid50>0.2), subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43), names=c("Undetectable at B","Detectable at B"), ylab="D43", ylim=c(0,4))
my.interaction.plot(subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43 & !is.na(Bpseudoneutid50) & !is.na(Day43pseudoneutid50), c(Bpseudoneutid50, Day43pseudoneutid50)),
x.ori = 0, xaxislabels = c("B", "D43"), cex.axis = 1, add = FALSE, xlab = "", ylab = "", pcol = NULL, lcol = NULL)
corplot(Delta43overBpseudoneutid50~Bpseudoneutid50, subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43))
# BA.1
corplot(Day43pseudoneutid50_BA.1~Bpseudoneutid50_BA.1, subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43), ylim=c(0,4))
myboxplot(Day43pseudoneutid50_BA.1~I(Bpseudoneutid50_BA.1>0.2), subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43), names=c("Undetectable at B","Detectable at B"), ylab="D43", ylim=c(0,4))
my.interaction.plot(subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43 & !is.na(Bpseudoneutid50_BA.1) & !is.na(Day43pseudoneutid50_BA.1), c(Bpseudoneutid50_BA.1, Day43pseudoneutid50_BA.1)),
x.ori = 0, xaxislabels = c("B", "D43"), cex.axis = 1, add = FALSE, xlab = "", ylab = "", pcol = NULL, lcol = NULL)
corplot(Delta43overBpseudoneutid50_BA.1~Bpseudoneutid50_BA.1, subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43))
Missingness across variants is mostly all or none. We decide to impute all variants if one variant marker exists.
# before imputation
dat1=dat_mapped
# after imputation
# dat1=dat_proc
for (t in 2:1) {
cat(paste0("stage ", t, '\n'))
# for (tp in c("B")) {
for (tp in c("B", "Day22", "Day43")) {
# placebo has a similar patten
for(trt in 1) {
myprint(tp, trt)
if (trt==2) {
# pool over arms
dat=subset(dat1, Trialstage==t & ph1.D43)
} else {
dat=subset(dat1, Trialstage==t & ph1.D43 & Trt==trt)
}
for (i in 1:8) {
dat[['tmp'%.%i]]=ifelse(is.na(dat[[tp%.%assays[i]]]), 0, 1)
}
dat$tmp=with(dat,paste0(tmp1,tmp2,tmp3,tmp4,tmp5,tmp6,tmp7,tmp8))
print(table(dat$tmp))
cat("\n")
# print(table(dat$EventIndPrimaryD43, dat$tmp, dat$Bserostatus))
}
}
}
## stage 2
## tp = B ; trt = 1
##
## 00000000 11111111
## 4619 485
##
## tp = Day22 ; trt = 1
##
## 00000000 11111111
## 4620 484
##
## tp = Day43 ; trt = 1
##
## 00000000 11111111
## 4621 483
##
## stage 1
## tp = B ; trt = 1
##
## 00000000 11111111
## 3605 487
##
## tp = Day22 ; trt = 1
##
## 00000000 01111110 10111111 11111101 11111110 11111111
## 3604 1 1 1 5 480
##
## tp = Day43 ; trt = 1
##
## 00000000 11111111
## 3608 484
Missingness across baseline, d22 and d43, for the ancestral markers in the vaccine arms.
for(trt in 1:0) {
myprint(trt)
for (i in 2:1) {
cat(paste0("stage ", i, '\n'))
dat=subset(dat_proc, Trialstage==i & Trt==trt & ph1.D43)
times=c("B","Day22","Day43")
for (i in 1:3) {
dat[['tmp'%.%i]]=ifelse(is.na(dat[[times[i]%.%'bindSpike']]), 0, 1)
}
dat$tmp=with(dat,paste0(tmp1,tmp2,tmp3))
print(table(dat$tmp))
cat("\n")
}
}
## trt = 1
## stage 2
##
## 000 101 110 111
## 4590 1 2 480
##
## stage 1
##
## 000 010 011 100 101 110 111
## 3560 2 1 1 1 3 478
##
## trt = 0
## stage 2
##
## 000 111
## 4575 377
##
## stage 1
##
## 000 011 101 110 111
## 3697 3 2 11 404
Correlation is high between variant markers in Stage 2 non-naive, vaccine arm at baseline
mypairs(subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43)[,"B"%.%bassays])
and D43
mypairs(subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43)[,"Day43"%.%bassays])
Correlation is still high between variant markers at D43 in Stage 2 naive, vaccine arm. (The correlation between the ancestral marker and the other markers range between 0.63 and 0.67.)
mypairs(subset(dat_proc, Trialstage==2 & Bserostatus==0 & Trt==1 & ph1.D43)[,"Day43"%.%bassays])
Correlation is low between baseline and D22 or D43 in Stage 2 non-naive, vaccine arm.
mypairs(subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43)[,c("B","Day22","Day43")%.%assays[1]])
mypairs(subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43)[,c("B","Day22","Day43")%.%assays[8]])
A closer look at the relationship between baseline and Day 43 markers in the baseline positive. Note that the D43 markers appear saturated since level seems flat across different baseline marker levels.
par(mfrow=c(2,4))
# ancestral marker
corplot(Day43bindSpike~BbindSpike, subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43), ylim=c(0,4))
myboxplot(Day43bindSpike~I(BbindSpike>0.5), subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43), names=c("Undetectable at B","Detectable at B"), ylab="D43", ylim=c(0,4))
my.interaction.plot(subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43 & !is.na(BbindSpike) & !is.na(Day43bindSpike), c(BbindSpike, Day43bindSpike)),
x.ori = 0, xaxislabels = c("B", "D43"), cex.axis = 1, add = FALSE, xlab = "", ylab = "", pcol = NULL, lcol = NULL)
corplot(Delta43overBbindSpike~BbindSpike, subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43))
# omicron
corplot(Day43bindSpike_omicron~BbindSpike_omicron, subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43), ylim=c(0,4))
myboxplot(Day43bindSpike_omicron~I(BbindSpike_omicron>0.5), subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43), names=c("Undetectable at B","Detectable at B"), ylab="D43", ylim=c(0,4))
my.interaction.plot(subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43 & !is.na(BbindSpike_omicron) & !is.na(Day43bindSpike_omicron), c(BbindSpike_omicron, Day43bindSpike_omicron)),
x.ori = 0, xaxislabels = c("B", "D43"), cex.axis = 1, add = FALSE, xlab = "", ylab = "", pcol = NULL, lcol = NULL)
corplot(Delta43overBbindSpike_omicron~BbindSpike_omicron, subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43))
missingness pattern across nAb and bAb markers shows that there is no overlap in pattern between nAb and nAb in stage 1 or 2.
dat1=dat_mapped
for (t in 2:1) {
cat(paste0("stage ", t, '\n'))
for (tp in c("Day43")) {
# placebo has a similar pattern
for(trt in 1) {
myprint(tp, trt)
dat=subset(dat1, Trialstage==t & ph1.D43 & Trt==trt)
dat$tmp1 = ifelse(is.na(dat$Day43pseudoneutid50), 0, 1)
dat$tmp2 = ifelse(is.na(dat$Day43pseudoneutid50_B.1.351), 0, 1)
dat$tmp3 = ifelse(is.na(dat$Day43bindSpike), 0, 1)
dat$tmp=with(dat,paste0(tmp1,tmp2))
print(table(dat$tmp))
dat$tmp=with(dat,paste0(tmp1,tmp3))
print(table(dat$tmp))
dat$tmp=with(dat,paste0(tmp2,tmp3))
print(table(dat$tmp))
cat("\n")
}
}
}
## stage 2
## tp = Day43 ; trt = 1
##
## 00 01 10 11
## 4382 3 20 699
##
## 00 01 10 11
## 4365 20 256 463
##
## 00 01 10 11
## 4369 33 252 450
##
## stage 1
## tp = Day43 ; trt = 1
##
## 00 01 10 11
## 3074 5 431 582
##
## 00 01 10 11
## 3072 7 536 477
##
## 00 01 10 11
## 3472 33 136 451
Omicron bAb ULOQ may be too low.
corplot(Day43bindSpike_omicron~Day43bindSpike, subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43), add.diagonal.line = F, method="pearson")
corplot(Day43pseudoneutid50_BA.4.5~Day43pseudoneutid50, subset(dat_proc, Trialstage==2 & Bserostatus==1 & Trt==1 & ph1.D43), add.diagonal.line = F, method="pearson")
correlation and missingness across bAb and nAb
for (sero in c(0,1)) {
for (trt in c(0,1)) {
for (stage in c(1,2)) {
myprint(stage, trt, sero)
dat=subset(dat_mapped, Trialstage==stage & Trt==trt & Bserostatus==sero)
corplot(Day43bindSpike~Day43pseudoneutid50, dat)
print(table(!is.na(dat$Day43bindSpike), !is.na(dat$Day43pseudoneutid50), dat$EventIndPrimaryD43))
}
}
}
## stage = 1 ; trt = 0 ; sero = 0
## , , = 0
##
##
## FALSE TRUE
## FALSE 555 246
## TRUE 0 37
##
## , , = 1
##
##
## FALSE TRUE
## FALSE 7 10
## TRUE 0 85
##
## stage = 2 ; trt = 0 ; sero = 0
## , , = 0
##
##
## FALSE TRUE
## FALSE 263 36
## TRUE 2 32
##
## , , = 1
##
##
## FALSE TRUE
## FALSE 1 1
## TRUE 3 35
##
## stage = 1 ; trt = 1 ; sero = 0
## , , = 0
##
##
## FALSE TRUE
## FALSE 379 271
## TRUE 4 239
##
## , , = 1
##
##
## FALSE TRUE
## FALSE 4 6
## TRUE 0 93
##
## stage = 2 ; trt = 1 ; sero = 0
## , , = 0
##
##
## FALSE TRUE
## FALSE 93 55
## TRUE 14 166
##
## , , = 1
##
##
## FALSE TRUE
## FALSE 0 0
## TRUE 5 23
##
## stage = 1 ; trt = 0 ; sero = 1
## , , = 0
##
##
## FALSE TRUE
## FALSE 2785 210
## TRUE 21 183
##
## , , = 1
##
##
## FALSE TRUE
## FALSE 2 7
## TRUE 9 96
##
## stage = 2 ; trt = 0 ; sero = 1
## , , = 0
##
##
## FALSE TRUE
## FALSE 4170 166
## TRUE 14 244
##
## , , = 1
##
##
## FALSE TRUE
## FALSE 6 2
## TRUE 9 59
##
## stage = 1 ; trt = 1 ; sero = 1
## , , = 0
##
##
## FALSE TRUE
## FALSE 2741 306
## TRUE 4 166
##
## , , = 1
##
##
## FALSE TRUE
## FALSE 3 6
## TRUE 0 63
##
## stage = 2 ; trt = 1 ; sero = 1
## , , = 0
##
##
## FALSE TRUE
## FALSE 4265 209
## TRUE 9 248
##
## , , = 1
##
##
## FALSE TRUE
## FALSE 5 1
## TRUE 1 64
Note that the marker level in the NN, placebo is higher in Stage 2 than in Stage 1, probably reflecting different countries.
myboxplot(Day43bindSpike~Trt+Bserostatus+Trialstage, dat_mapped, ylab="D43 bAb Spike",
names=rep(c("N,pla", "N,vac", "NN,pla", "NN,vac"),2),
main="Stage 1 Stage 2",
cex.axis=0.8)
abline(v=4.5)
We have list of ptids sampled in step 2 in both stages of the trial.
stage2step2=read.csv("~/Aiying700.csv")
dat_mapped$stage2step2 = dat_mapped$Subjectid %in% stage2step2$Unique.Subject.Identifier
dat_proc$stage2step2 = dat_proc$Ptid %in% stage2step2$Unique.Subject.Identifier
stage1step2=read.csv("~/JinStage1Step2.csv")
dat_mapped$stage1step2 = dat_mapped$SUBJID %in% stage1step2$SUBJID
dat_proc$stage1step2 = dat_proc$SUBJID %in% stage1step2$SUBJID
Stage 2, nAb. batch 1 = step2 for both cases and noncases
with(subset(dat_proc, Trt==1 & Trialstage==2 & ph2.D43.nAb), table(stage2step2, EventIndPrimaryD1, nAbBatch))
## , , nAbBatch = 1
##
## EventIndPrimaryD1
## stage2step2 0 1
## FALSE 0 0
## TRUE 286 24
##
## , , nAbBatch = 2
##
## EventIndPrimaryD1
## stage2step2 0 1
## FALSE 350 54
## TRUE 0 0
with(subset(dat_proc, Trt==0 & Trialstage==2 & ph2.D43.nAb), table(stage2step2, EventIndPrimaryD1, nAbBatch))
## , , nAbBatch = 1
##
## EventIndPrimaryD1
## stage2step2 0 1
## FALSE 0 0
## TRUE 229 30
##
## , , nAbBatch = 2
##
## EventIndPrimaryD1
## stage2step2 0 1
## FALSE 251 55
## TRUE 0 0
with(subset(dat_proc, Trt==1 & Trialstage==2 & ph2.D22.nAb), table(stage2step2, EventIndPrimaryD1, nAbBatch))
## , , nAbBatch = 1
##
## EventIndPrimaryD1
## stage2step2 0 1
## FALSE 0 0
## TRUE 286 40
##
## , , nAbBatch = 2
##
## EventIndPrimaryD1
## stage2step2 0 1
## FALSE 358 58
## TRUE 0 0
with(subset(dat_proc, Trt==0 & Trialstage==2 & ph2.D22.nAb), table(stage2step2, EventIndPrimaryD1, nAbBatch))
## , , nAbBatch = 1
##
## EventIndPrimaryD1
## stage2step2 0 1
## FALSE 0 0
## TRUE 228 81
##
## , , nAbBatch = 2
##
## EventIndPrimaryD1
## stage2step2 0 1
## FALSE 254 57
## TRUE 0 0
Stage 1, nAb. Most non-cases are done in batch 2, most cases are done in batch 1. Step 1 and 2 are spread mostly equally between batches.
with(subset(dat_proc, Trialstage==1 & ph2.D43.nAb), table(stage1step2, nAbBatch, EventIndPrimaryD1))
## , , EventIndPrimaryD1 = 0
##
## nAbBatch
## stage1step2 1 2
## FALSE 24 576
## TRUE 5 175
##
## , , EventIndPrimaryD1 = 1
##
## nAbBatch
## stage1step2 1 2
## FALSE 221 68
## TRUE 32 6
bAb.
# stage 1
with(subset(dat_proc, Trialstage==1 & ph2.D22.bAb), table(stage1step2, EventIndPrimaryD1))
## EventIndPrimaryD1
## stage1step2 0 1
## FALSE 478 298
## TRUE 129 43
# stage 2
with(subset(dat_proc, Trialstage==2 & ph2.D22.bAb), table(stage2step2, EventIndPrimaryD1))
## EventIndPrimaryD1
## stage2step2 0 1
## FALSE 341 126
## TRUE 347 125